# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request, FormRequest
from scrapy.exceptions import IgnoreRequest
from zc_core.util.http_util import retry_request
from zc_core.model.items import Box
from fjzfcg.rules import *
from fjzfcg.utils.captcha import Captcha
from zc_core.spiders.base import BaseSpider


class SpuSpider(BaseSpider):
    name = 'spu'

    # 常用链接
    catalog_url = 'http://120.35.30.176/shopping/goodsclass.htm'
    brand_home_url = 'http://120.35.30.176/shopping/storefilter.htm'
    brand_url = 'http://120.35.30.176/shopping/storefilter.htm?goodsTypeId={}'
    shop_list_url = 'http://120.35.30.176/shopping/storefilter_ajax.htm'
    shop_index_url = 'http://120.35.30.176/shopping/goodslist.htm?orderBy=addTime&orderType=desc&store_id={}&currentPage={}&xToken={}'
    sku_list_url = 'http://120.35.30.176/shopping/search.htm'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SpuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)

    def start_requests(self):
        auth = Captcha().store_goods_regist()
        if auth:
            self.cookies = auth.get('cookie')
            self.x_token = auth.get('xtoken')
        else:
            raise Exception('鉴权验证失败')

        # self.cookies = {'JSESSIONID': 'AFD86F4802F2433CCAFEB7E9E2F3B6E6'}
        # self.x_token = 'CAFED020873A4A10BFB01827661EF25F'

        # 品类
        yield Request(
            url=self.catalog_url,
            meta={
                'batchNo': self.batch_no,
            },
            cookies=self.cookies,
            callback=self.parse_catalog,
            errback=self.error_back,
            priority=200,
            dont_filter=True
        )
        # 品牌
        yield Request(
            url=self.brand_home_url,
            meta={
                'batchNo': self.batch_no,
            },
            cookies=self.cookies,
            callback=self.parse_brand_home,
            errback=self.error_back,
            priority=200,
            dont_filter=True
        )
        # 供应商
        yield FormRequest(
            method='POST',
            url=self.shop_list_url,
            formdata={
                'currentPage': '1',
                'typeid': '000000005949243b015949272c6d0001',
                'isAll': '1',
                'search_entere_zonename': '3500',
            },
            cookies=self.cookies,
            callback=self.parse_shop_page_total,
            errback=self.error_back,
            priority=10,
            dont_filter=True
        )

    def parse_catalog(self, response):
        # 处理品类列表
        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

    def parse_brand_home(self, response):
        type_ids = parse_brand_home(response)
        self.logger.info('品牌分类: total[%s]' % len(type_ids))
        for type_id in type_ids:
            # brand
            yield Request(
                url=self.brand_url.format(type_id),
                meta={
                    'batchNo': self.batch_no,
                    'typeId': type_id,
                },
                cookies=self.cookies,
                callback=self.parse_brand,
                errback=self.error_back,
                priority=10,
                dont_filter=True
            )

    # 处理brand列表
    def parse_brand(self, response):
        brands = parse_brand(response)
        if brands:
            self.logger.info('品牌: count[%s]' % len(brands))
            yield Box('brand', self.batch_no, brands)

    # 处理shop total page
    def parse_shop_page_total(self, response):
        total = parse_shop_page_total(response)
        if total:
            self.logger.info('供应商列表: total[%s]页' % total)
            for page in range(1, total + 1):
                # 供应商
                yield FormRequest(
                    method='POST',
                    url=self.shop_list_url,
                    formdata={
                        'currentPage': str(page),
                        'typeid': '000000005949243b015949272c6d0001',
                        'isAll': '1',
                        'search_entere_zonename': '3500',
                    },
                    meta={
                        'batchNo': self.batch_no,
                        'page': page,
                    },
                    cookies=self.cookies,
                    callback=self.parse_shop_list,
                    errback=self.error_back,
                    priority=10,
                    dont_filter=True
                )

    # 处理shop列表
    def parse_shop_list(self, response):
        shops = parse_shop_list(response)
        if shops:
            self.logger.info('供应商: count[%s]' % len(shops))
            yield Box('supplier', self.batch_no, shops)

            for shop in shops:
                shop_id = shop.get('id')
                yield Request(
                    url=self.shop_index_url.format(shop_id, 1, self.x_token),
                    meta={
                        'batchNo': self.batch_no,
                        'shopId': shop_id,
                    },
                    cookies=self.cookies,
                    callback=self.parse_spu_total_page,
                    errback=self.error_back,
                    priority=10,
                    dont_filter=True
                )

    # 处理spu总页数
    def parse_spu_total_page(self, response):
        meta = response.meta
        total = parse_total_page(response)
        self.logger.info('总页数: total=%s' % total)

        # 采集SPU列表
        for page in range(1, total + 1):
            shop_id = meta.get('shopId')
            yield Request(
                url=self.shop_index_url.format(shop_id, page, self.x_token),
                meta={
                    'batchNo': self.batch_no,
                    'shopId': shop_id,
                    'page': page,
                },
                cookies=self.cookies,
                callback=self.parse_spu,
                errback=self.error_back,
                priority=100,
                dont_filter=True
            )

    # 处理spu列表
    def parse_spu(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        shop_id = meta.get('shopId')

        # 处理商品
        spu_list = parse_spu(response)
        if spu_list:
            self.logger.info('清单: shop=%s, page=%s, cnt=%s' % (shop_id, cur_page, len(spu_list)))
            yield Box('spu', self.batch_no, spu_list)
        else:
            self.logger.info('分页为空: page=%s' % cur_page)
